# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html


# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import pymysql
import re
import json
from lxml import etree


class MeishiPipeline:

	# 获取数据库连接
	def open_spider(self, spider):
		try :
			db = spider.settings.get('MYSQL_DB_NAME','test')
			host = spider.settings.get('MYSQL_HOST', 'localhost')
			port = spider.settings.get('MYSQL_PORT', 3306)
			user = spider.settings.get('MYSQL_USER', 'root')
			passwd = spider.settings.get('MYSQL_PASSWORD', '1234')

			# 建立数据库连接
			self.db_conn = pymysql.connect(host=host, port=port, db=db, user=user, passwd=passwd, charset='utf8')
			# 获取游标
			self.cursor = self.db_conn.cursor()
		except:
			print("pymysql connect have something wrong")

	def __init__(self):
		pass

	# 处理数据
	def process_item(self,item,spider):
		try :
			# 数据处理
			# 去除字符串中的空格，用“”替换空格
			item['technology'] = item['technology'].replace(" ","")	# 制作工艺
			item['flavor'] = item['flavor'].replace(" ","")			# 口味
			item['makeTime'] = item['makeTime'].replace(" ","")		# 制作时长
			item['makeStepCount'] = item['makeStepCount'].replace(" ","")	# 制作步数
		except :
			print("Remove spaces have something wrong")

		try :
			# 数组封装成json
			# if len(item['benefit'])==0:
			# 	item['benefit']=None
			# else:
			item['benefit'] = json.dumps(item['benefit'],ensure_ascii=False)	#益处
			item['mainIngredients'] = json.dumps(item['mainIngredients'],ensure_ascii=False)	# 主要食材
			# if len(item['auxiliaryIngredients'])==0:
			# 	item['auxiliaryIngredients']=None
			# else:
			item['auxiliaryIngredients'] = json.dumps(item['auxiliaryIngredients'],ensure_ascii=False)	#辅助食材
		except :
			print("Package to json have something wrong")

		try :
			# 字符串中提取数字
			item['makeStepCount'] = re.findall(r"\d+",item['makeStepCount'])	#制作步数
			item['popularity'] = re.findall(r"\d+",item['popularity'])		# 人气
			item['comment'] = re.findall(r"\d+",item['comment'])		# 评论数
		except:
			print("extract number have something wrong")

		self.insert_data(item)
		return item

	# 插入数据
	def insert_data(self,item):

		# 一级分类，二级分类，名字，制作工艺，口味，制作时长，制作步数，人气值，评论数，益处，描述，主要食材，辅助食材，图片URI
		data = (None,
			item['TypeOne'],
			item['TypeTwo'],
			item['name'],
			item['technology'],
			item['flavor'],
			item['makeTime'],
			item['makeStepCount'],
			item['popularity'],
			item['comment'],
			item['benefit'],
			item['describe'],
			item['mainIngredients'],
			item['auxiliaryIngredients'],
			item['image'])

		sql = "insert into meishi values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"

		try:
			# 提交插入数据
			self.cursor.execute(sql,data)
			self.db_conn.commit()
			print("insert OK")
		except :
			print("db commit wrong")	# 报错
			self.db_conn.rollback()		# 回滚数据


	# 关闭数据库连接
	def close_spider(self,spider):
		self.cursor.close()
		self.db_conn.close()


#-分割线--分割线--分割线--分割线--分割线--分割线--分割线--分割线--分割线--分割线--分割线--分割线--分割线--分割线--分割线-


class MztrPipeline:
	# 获取数据库连接
	def open_spider(self, spider):
		try :
			db = spider.settings.get('MYSQL_DB_NAME','test')
			host = spider.settings.get('MYSQL_HOST', 'localhost')
			port = spider.settings.get('MYSQL_PORT', 3306)
			user = spider.settings.get('MYSQL_USER', 'root')
			passwd = spider.settings.get('MYSQL_PASSWORD', '1234')

			# 建立数据库连接
			self.db_conn = pymysql.connect(host=host, port=port, db=db, user=user, passwd=passwd, charset='utf8')
			# 获取游标
			self.cursor = self.db_conn.cursor()
			pirnt("mysql connet successful")
		except:
			print("pymysql connect have something wrong")

	# 处理数据
	def process_item(self,item,spider):
		try :
			string = item['str']
			# jxxi = etree.HTML(string)
			# jx = jxxi.xpath("//script/text()")[5]
			# hide_str = jx+""
			hide_dict = string[19:-1]
			hide_json = json.loads(hide_dict)
			cityName = hide_json['cityName']
			shop_list = hide_json['poiLists']['poiInfos']
			for shop in shop_list:
				mztrItem = MztrItem()
				mztrItem['shopId'] = shop['poiId'],
				mztrItem['shopName'] = shop['title'],
				mztrItem['cityName'] = cityName,
				mztrItem['avgScore'] = shop['avgScore'],
				mztrItem['allCommentNum'] = shop['allCommentNum'],
				mztrItem['address'] = shop['address'],
				mztrItem['avgPrice'] = shop['avgPrice'],
				mztrItem['frontimg'] =shop['frontImg']
				pirnt("data process ok ")
				insert_data(mztrItem)
				yield
			print("data process all OK ")
		except:
			print("data process have wrong")


	# 插入数据
	def insert_data(self,item,spider):
		data = (None,
			item['shopId'],
			item['shopName'],
			item['cityName'],
			# item['shopstStyle'],
			item['avgScore'],
			item['allCommentNum'],
			item['address'],
			item['avgPrice'],
			item['frontimg'])
		sql = "insert into mztr(id,shopId,shopName,cityName,avgScore,allCommentNum,address,avgPrice,frontimg) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
		try:
			# 提交插入数据
			self.cursor.execute(sql,data)
			self.db_conn.commit()
			print("insert OK")
		except :
			print("db commit wrong")	# 报错
			self.db_conn.rollback()		# 回滚数据

	# 关闭数据库连接
	def close_spider(self,spider):
		self.cursor.close()
		self.db_conn.close()
